package service

// ChunkType 分块类型枚举
type ChunkType string

const (
	ChunkTypeNumberSequence ChunkType = "number_sequence" // 数字序列
	ChunkTypePunctuation    ChunkType = "punctuation"     // 标点符号
	ChunkTypeFragment       ChunkType = "fragment"        // 句子片段
	ChunkTypeVeryShort      ChunkType = "very_short"      // 非常短的内容
	ChunkTypeNormal         ChunkType = "normal"          // 正常内容
)

// ChunkMergeReason 分块合并原因
type ChunkMergeReason string

const (
	ReasonSemanticSimilarity    ChunkMergeReason = "semantic_similarity"     // 语义相似度
	ReasonSmartMerge           ChunkMergeReason = "smart_merge"              // 智能合并
	ReasonForceMerge           ChunkMergeReason = "force_merge"              // 强制合并
	ReasonBelowMinThreshold    ChunkMergeReason = "below_minimum_threshold"  // 低于最小阈值
	ReasonFinalValidation      ChunkMergeReason = "final_validation"         // 最终验证
)

// ChunkingConfig 分块配置
type ChunkingConfig struct {
	ForceMinChunkSize           int     // 强制合并阈值（100字符）
	SemanticSimilarityThreshold float32 // 语义相似度阈值（0.75）
}

// DefaultChunkingConfig 返回默认分块配置
func DefaultChunkingConfig() *ChunkingConfig {
	return &ChunkingConfig{
		ForceMinChunkSize:           100,  // 50个汉字约100个字符
		SemanticSimilarityThreshold: 0.75, // 语义相似度阈值
	}
}